# -*- coding: utf-8 -*-

import scrapy
#from test1.items import jobinfoitem, cominfoitem
from test1.items import jobinfoitem, cominfoitem,mittdianxin
from scrapy import Request
from scrapy.loader import ItemLoader
from scrapyluke.processors import *
import re
import time
import datetime

class ChinahrSpider(scrapy.Spider):
    name = 'dianxin'
    cominfo = cominfoitem()
    start_urls = ['http://shenpi.miit.gov.cn/commonquery/result?categoryId=302&pagenow=1']

    def parse(self,response):
        if response.xpath('//input[@name="aId"]/@value'):
            #利用xpath获取各公司的aid
            job_url= response.xpath('//input[@name="aId"]/@value').extract()
            for aidnum in job_url:
                #利用aid发送请求
                aidnum2 = str(aidnum)
                yield scrapy.FormRequest("http://shenpi.miit.gov.cn/commonquery/showDetail",
                                   formdata={'aid':aidnum2},
                                   callback=self.parse_info)
            page = re.findall('oryId=302&pagenow=(\d+)', response.url)[0]
            #翻页
            curr_page = int(page) + 1
            yield scrapy.Request('http://shenpi.miit.gov.cn/commonquery/result?categoryId=302&pagenow=%d' % curr_page, callback= self.parse)

    def parse_info(self, response):
         if response.body:
            cominfo = cominfoitem()
            loaderCom = ItemLoader(item=mittdianxin())
            getbody = response.body
            body_list = list(eval(getbody))
            cominfo['com_code'] = ''   #初始化字典
            cominfo['com_name'] = ''
            cominfo['bus_kind'] = ''
            cominfo['bus_arr'] = ''
            cominfo['com_vp'] = ''
            for dict in body_list:                        #判断是否存在相关内容,并放入item(这网站所有的字典的key值居然都相同，是正常人写的么？！)
                checkstr = dict['deffieldName']
                databuf = dict['deffieldValue']
                if '许可证编号' in checkstr:
                    cominfo['com_code'] = databuf
                elif '司名称' in checkstr:
                    cominfo['com_name'] = databuf
                elif '种类' in checkstr:
                    cominfo['bus_kind'] = databuf
                elif '范围' in checkstr:
                    cominfo['bus_arr'] = databuf
                elif '有效期' in checkstr:
                    cominfo['com_vp'] = databuf
            time = datetime.datetime.now()
            curr_time = str(time)
            cominfo['insert_time'] = curr_time
            cominfo['url'] = 'http://www.miit.gov.cn/n1146300/n1306936/n1307266/n1307270/index.html'
            loaderCom.add_value('insert_time', cominfo['insert_time'])
            loaderCom.add_value('url', cominfo['url'])
            loaderCom.add_value('com_code', cominfo['com_code'])
            loaderCom.add_value('com_name', cominfo['com_name'])
            loaderCom.add_value('bus_kind', cominfo['bus_kind'])
            loaderCom.add_value('bus_arr', cominfo['bus_arr'])
            loaderCom.add_value('com_vp', cominfo['com_vp'])
            return loaderCom.load_item()
